{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"D2H6YLCPOXvZ","outputId":"462535d9-76e3-4985-a832-6e4d8350f270","executionInfo":{"status":"ok","timestamp":1709405923529,"user_tz":480,"elapsed":39697,"user":{"displayName":"Yu Wang","userId":"02802289530208391889"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting transformers==4.35.2\n","  Downloading transformers-4.35.2-py3-none-any.whl (7.9 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m69.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (3.13.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (0.15.2)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.2) (4.66.2)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers==4.35.2) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers==4.35.2) (4.10.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.35.2) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.35.2) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.35.2) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.35.2) (2024.2.2)\n","Installing collected packages: transformers\n","  Attempting uninstall: transformers\n","    Found existing installation: transformers 4.38.1\n","    Uninstalling transformers-4.38.1:\n","      Successfully uninstalled transformers-4.38.1\n","Successfully installed transformers-4.35.2\n","Collecting datasets==2.15.0\n","  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (1.25.2)\n","Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (14.0.2)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (0.6)\n","Collecting dill<0.3.8,>=0.3.0 (from datasets==2.15.0)\n","  Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (1.5.3)\n","Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (2.31.0)\n","Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (4.66.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (3.4.1)\n","Collecting multiprocess (from datasets==2.15.0)\n","  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (3.9.3)\n","Requirement already satisfied: huggingface-hub>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (0.20.3)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets==2.15.0) (6.0.1)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.15.0) (4.0.3)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets==2.15.0) (3.13.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets==2.15.0) (4.10.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.15.0) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.15.0) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.15.0) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.15.0) (2024.2.2)\n","INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n","  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets==2.15.0) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets==2.15.0) (2023.4)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets==2.15.0) (1.16.0)\n","Installing collected packages: dill, multiprocess, datasets\n","Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15\n","Collecting accelerate==0.25.0\n","  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (23.2)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (5.9.5)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (6.0.1)\n","Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (2.1.0+cu121)\n","Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (0.20.3)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.25.0) (0.4.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (3.13.1)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (2023.6.0)\n","Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.25.0) (2.1.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate==0.25.0) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate==0.25.0) (4.66.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate==0.25.0) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate==0.25.0) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate==0.25.0) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate==0.25.0) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate==0.25.0) (2024.2.2)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate==0.25.0) (1.3.0)\n","Installing collected packages: accelerate\n","Successfully installed accelerate-0.25.0\n","Requirement already satisfied: torch==2.1.0+cu121 in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (3.13.1)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (2023.6.0)\n","Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.0+cu121) (2.1.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.1.0+cu121) (2.1.5)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.1.0+cu121) (1.3.0)\n","Collecting gdown==4.5.4\n","  Downloading gdown-4.5.4-py3-none-any.whl (14 kB)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown==4.5.4) (3.13.1)\n","Requirement already satisfied: requests[socks] in /usr/local/lib/python3.10/dist-packages (from gdown==4.5.4) (2.31.0)\n","Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from gdown==4.5.4) (1.16.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from gdown==4.5.4) (4.66.2)\n","Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown==4.5.4) (4.12.3)\n","Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown==4.5.4) (2.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.5.4) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.5.4) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.5.4) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.5.4) (2024.2.2)\n","Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.5.4) (1.7.1)\n","Installing collected packages: gdown\n","  Attempting uninstall: gdown\n","    Found existing installation: gdown 4.7.3\n","    Uninstalling gdown-4.7.3:\n","      Successfully uninstalled gdown-4.7.3\n","Successfully installed gdown-4.5.4\n","Downloading...\n","From: https://drive.google.com/uc?id=18oZZ4jqRK-uF-Nz6ftRdgNjKix88hrnO\n","To: /content/data_and_models.zip\n","100% 33.3M/33.3M [00:01<00:00, 32.9MB/s]\n","Archive:  data_and_models.zip\n","   creating: data_and_models/\n","  inflating: __MACOSX/._data_and_models  \n","  inflating: data_and_models/logistic_model_8.pkl  \n","  inflating: __MACOSX/data_and_models/._logistic_model_8.pkl  \n","  inflating: data_and_models/tfidf_44.pkl  \n","  inflating: __MACOSX/data_and_models/._tfidf_44.pkl  \n","  inflating: data_and_models/tfidf_8.pkl  \n","  inflating: __MACOSX/data_and_models/._tfidf_8.pkl  \n","  inflating: data_and_models/target_corpus.csv  \n","  inflating: __MACOSX/data_and_models/._target_corpus.csv  \n","  inflating: data_and_models/logistic_model_44.pkl  \n","  inflating: __MACOSX/data_and_models/._logistic_model_44.pkl  \n"]}],"source":["!pip install transformers==4.35.2\n","!pip install datasets==2.15.0\n","!pip install accelerate==0.25.0\n","!pip install torch==2.1.0+cu121\n","\n","!pip install --upgrade --no-cache-dir gdown==4.5.4\n","\n","!gdown 18oZZ4jqRK-uF-Nz6ftRdgNjKix88hrnO\n","!unzip data_and_models.zip && rm data_and_models.zip"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["1399ded814714874bf7622550ac07423","b96c7115fa504abdb76bdbfffd31e0af","35c5287188c549f6b9ce0f777149f2dc","267e881d0a1a4a6fbd8c21deb519a260","5c84b97e84034bec980580f33a9a6bfc","72072c7e97114dde8271c781b39155a1","6a3692f7dff64b2a82c265065b15c446","e78fabe0c6a74780911b3c2b6f33df2c","5dc13902568249ff85e516f2a17333c1","a4be2de6110b4656914c7768d07cae1d","a2f0a5652ca64036a259030b0967fbbc","c7c5c74e696e464e98138e19c31eab75","619f3702180f4869a2170de74d2fdad0","402fc3ce1f674c1eb7ea97c715802e7f","d274b4d469774cad91decd327312ac94","352bab9891af429c866847d7693ef76d","997c1ce0e8f44d6c968e9dd6e3ae6ef0","e0abcb7cfc6c4e028ac065cca4e6f121","aec2bb8e4738478a9e8e5d29f6ca56d2","6de45df067ed41878d2a670b88140511","33450be48ed54dc8818dadff77ed30ff","07a5d73897da452fb3a09ebd52d93e68","971cd36984984e878aaade86e425cf04","ba97db9e81ac4540b12d385835317bd2","0193884297004ef2ae6bb4c67cf31d9f","170f192382f14f299e9b5e91e726892b","37ea044cf91d4303a3cc68ba87972311","ebd9d09cd0584faeb445e37293d24c8e","451e43094a8c4845adc656e1f898b9dd","0b51cfc000e943e4b244d086f6f2033d","8393ec99d3d148ddb9c15caf69828c4f","98d4960a9b1a4c75970b53bbe0955e06","b69bd3def77c4f308d5cdd01972747f4","9eafe09e0b1e4e9a947c7800ea5e468d","2ddcd05359464b1bb66b6330276f88fb","ceee2d466ee34cbabf29b4d1f27b6367","518ae1d093f54f238d21ba6154c55045","1200ef5d626d4adb9a60cac26c4a91a4","3712dc0e37904c5eaad5215a4b8c57ec","24adcdeebbcc45c2a3e766851d8985a5","c7f057f02e854eb5ab8195b432515105","820d3affb6fb426fad1aa75190ab1f92","e4ea68ec942b4d1b893f3cd0a792285d","ed870e26cd614d85abb4491c153dc2d6","8fd6b9203631400d8d247b086041a865","2e77c4af19c64a89a691208338ce81e1","f6a945066f094c409bdb9fea3a43d33e","d6bd72852b954c758a4c0faef0ab9f50","520867658b7c47b3925fcf74c98f7830","44dc7af7b5904ce79ce9711fea820298","1cfb07d9ca074a4facf9973c93dbba43","3ba96d9f2ba3448caa2504146e51fddc","ab4bc3e607fd4899b2deb6e16697faf1","7cefbaf4b32140989d5c9331b61cfbe3","6f475b17907e4cd998968c74106f909a","3087ed5723b1459ba2761c95bfecdad6","ba71873c841747d496a73e562cf9aeb6","9580f30b3f0d4addaacdce9d125d3fd0","24e6f19792454c54a5d5e30307eaa047","41d7576cd212489d92897219fa1c1d00","fd6e6b64eefc4a06aec8f06cbbfdf6e1","c3652aae2e40413a88c97471e5e6ad81","713c2911fe5b423d882520e5fdb37697","894a2e47af1242ba8aae550ff504b912","9eeaa650a28b4417a334cd5322690d90","5808badb3c65493c9086a9f8d6e1bea6","98b842e25a7d4f80ab52e8681d1c59b5","8ab9f5047d3c4275989d5dcde5c78e41","c7468e53c6024fe3b95d7acca8ae9af8","44e1569086d5461ab4b05dfc0f85167e","625d1593c44e456b9ec24540c120445d","496cec653a024b35805aa1cf2abcb5ab","87603985724e4a17946c17cf1ff32ca3","a8a7c7eb2f5944328dc764de3a6ddcab","b38d5afa27bd42da899b097a79d58cbb","318870872ce24d3b8e09bab1c1346107","cec6b2f3b57d4f07a15d6806608ff010"]},"id":"H-LANn-hUlZh","outputId":"9bd10fed-1617-4119-ec69-8753aeccb235","executionInfo":{"status":"ok","timestamp":1709410409055,"user_tz":480,"elapsed":2235838,"user":{"displayName":"Yu Wang","userId":"02802289530208391889"}}},"outputs":[{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n","  warnings.warn(\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"1399ded814714874bf7622550ac07423","version_major":2,"version_minor":0},"text/plain":["tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c7c5c74e696e464e98138e19c31eab75","version_major":2,"version_minor":0},"text/plain":["vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"971cd36984984e878aaade86e425cf04","version_major":2,"version_minor":0},"text/plain":["merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9eafe09e0b1e4e9a947c7800ea5e468d","version_major":2,"version_minor":0},"text/plain":["tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8fd6b9203631400d8d247b086041a865","version_major":2,"version_minor":0},"text/plain":["config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/sklearn/base.py:318: UserWarning: Trying to unpickle estimator LogisticRegression from version 0.24.1 when using version 1.2.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n","https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n","  warnings.warn(\n","<ipython-input-2-e9d4338555d5>:80: DeprecationWarning: Please use `csr_matrix` from the `scipy.sparse` namespace, the `scipy.sparse.csr` namespace is deprecated.\n","  cf_tokenizer = pickle.load(doc)\n","/usr/local/lib/python3.10/dist-packages/sklearn/base.py:318: UserWarning: Trying to unpickle estimator TfidfTransformer from version 0.24.1 when using version 1.2.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n","https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n","  warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/sklearn/base.py:318: UserWarning: Trying to unpickle estimator TfidfVectorizer from version 0.24.1 when using version 1.2.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n","https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n","  warnings.warn(\n"]},{"metadata":{"tags":null},"name":"stdout","output_type":"stream","text":["# classes 8\n","2915 625 625\n","# classes in train 8\n","# classes in dev 8\n","# classes in test 8\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3087ed5723b1459ba2761c95bfecdad6","version_major":2,"version_minor":0},"text/plain":["model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"data":{"text/html":["\n","    <div>\n","      \n","      <progress value='3660' max='3660' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [3660/3660 24:21, Epoch 20/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>Accuracy</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.157300</td>\n","      <td>1.166122</td>\n","      <td>0.606400</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>0.987000</td>\n","      <td>1.078287</td>\n","      <td>0.616000</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>0.791400</td>\n","      <td>1.125870</td>\n","      <td>0.622400</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.515800</td>\n","      <td>1.167879</td>\n","      <td>0.640000</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.338900</td>\n","      <td>1.281621</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.245300</td>\n","      <td>1.414188</td>\n","      <td>0.636800</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.182500</td>\n","      <td>1.613628</td>\n","      <td>0.628800</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.211800</td>\n","      <td>2.038123</td>\n","      <td>0.617600</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.138300</td>\n","      <td>2.041278</td>\n","      <td>0.633600</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.044300</td>\n","      <td>2.270245</td>\n","      <td>0.624000</td>\n","    </tr>\n","    <tr>\n","      <td>11</td>\n","      <td>0.096200</td>\n","      <td>2.270992</td>\n","      <td>0.638400</td>\n","    </tr>\n","    <tr>\n","      <td>12</td>\n","      <td>0.004000</td>\n","      <td>2.562562</td>\n","      <td>0.624000</td>\n","    </tr>\n","    <tr>\n","      <td>13</td>\n","      <td>0.011200</td>\n","      <td>2.651264</td>\n","      <td>0.636800</td>\n","    </tr>\n","    <tr>\n","      <td>14</td>\n","      <td>0.001100</td>\n","      <td>2.545812</td>\n","      <td>0.648000</td>\n","    </tr>\n","    <tr>\n","      <td>15</td>\n","      <td>0.005200</td>\n","      <td>2.603889</td>\n","      <td>0.638400</td>\n","    </tr>\n","    <tr>\n","      <td>16</td>\n","      <td>0.007700</td>\n","      <td>2.647458</td>\n","      <td>0.654400</td>\n","    </tr>\n","    <tr>\n","      <td>17</td>\n","      <td>0.004000</td>\n","      <td>2.669226</td>\n","      <td>0.648000</td>\n","    </tr>\n","    <tr>\n","      <td>18</td>\n","      <td>0.000500</td>\n","      <td>2.655745</td>\n","      <td>0.662400</td>\n","    </tr>\n","    <tr>\n","      <td>19</td>\n","      <td>0.000500</td>\n","      <td>2.635825</td>\n","      <td>0.659200</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>0.064500</td>\n","      <td>2.667681</td>\n","      <td>0.657600</td>\n","    </tr>\n","  </tbody>\n","</table><p>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["<ipython-input-2-e9d4338555d5>:11: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n","  metric = load_metric(\"accuracy\")\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"98b842e25a7d4f80ab52e8681d1c59b5","version_major":2,"version_minor":0},"text/plain":["Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stdout","output_type":"stream","text":["# classes 8\n","2915 625 625\n","# classes in train 8\n","# classes in dev 8\n","# classes in test 8\n"]},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"data":{"text/html":["\n","    <div>\n","      \n","      <progress value='1853' max='3660' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [1853/3660 12:33 < 12:15, 2.46 it/s, Epoch 10.12/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>Accuracy</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.230800</td>\n","      <td>1.185395</td>\n","      <td>0.587200</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.219100</td>\n","      <td>1.051044</td>\n","      <td>0.651200</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>0.660400</td>\n","      <td>1.115361</td>\n","      <td>0.638400</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.471200</td>\n","      <td>1.159856</td>\n","      <td>0.651200</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.372000</td>\n","      <td>1.319657</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.144600</td>\n","      <td>1.557757</td>\n","      <td>0.632000</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.329400</td>\n","      <td>1.634227</td>\n","      <td>0.648000</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.114600</td>\n","      <td>2.076972</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.099900</td>\n","      <td>2.109875</td>\n","      <td>0.628800</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.110600</td>\n","      <td>2.214436</td>\n","      <td>0.630400</td>\n","    </tr>\n","  </tbody>\n","</table><p>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='3660' max='3660' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [3660/3660 24:46, Epoch 20/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>Accuracy</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.230800</td>\n","      <td>1.185395</td>\n","      <td>0.587200</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.219100</td>\n","      <td>1.051044</td>\n","      <td>0.651200</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>0.660400</td>\n","      <td>1.115361</td>\n","      <td>0.638400</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.471200</td>\n","      <td>1.159856</td>\n","      <td>0.651200</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.372000</td>\n","      <td>1.319657</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.144600</td>\n","      <td>1.557757</td>\n","      <td>0.632000</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.329400</td>\n","      <td>1.634227</td>\n","      <td>0.648000</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.114600</td>\n","      <td>2.076972</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.099900</td>\n","      <td>2.109875</td>\n","      <td>0.628800</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.110600</td>\n","      <td>2.214436</td>\n","      <td>0.630400</td>\n","    </tr>\n","    <tr>\n","      <td>11</td>\n","      <td>0.003100</td>\n","      <td>2.369902</td>\n","      <td>0.628800</td>\n","    </tr>\n","    <tr>\n","      <td>12</td>\n","      <td>0.004000</td>\n","      <td>2.346757</td>\n","      <td>0.648000</td>\n","    </tr>\n","    <tr>\n","      <td>13</td>\n","      <td>0.041400</td>\n","      <td>2.473668</td>\n","      <td>0.641600</td>\n","    </tr>\n","    <tr>\n","      <td>14</td>\n","      <td>0.003300</td>\n","      <td>2.520145</td>\n","      <td>0.635200</td>\n","    </tr>\n","    <tr>\n","      <td>15</td>\n","      <td>0.001800</td>\n","      <td>2.554074</td>\n","      <td>0.656000</td>\n","    </tr>\n","    <tr>\n","      <td>16</td>\n","      <td>0.000600</td>\n","      <td>2.656835</td>\n","      <td>0.651200</td>\n","    </tr>\n","    <tr>\n","      <td>17</td>\n","      <td>0.000500</td>\n","      <td>2.679398</td>\n","      <td>0.649600</td>\n","    </tr>\n","    <tr>\n","      <td>18</td>\n","      <td>0.000500</td>\n","      <td>2.690604</td>\n","      <td>0.656000</td>\n","    </tr>\n","    <tr>\n","      <td>19</td>\n","      <td>0.000500</td>\n","      <td>2.699834</td>\n","      <td>0.654400</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>0.000900</td>\n","      <td>2.706052</td>\n","      <td>0.651200</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["# classes 8\n","2915 625 625\n","# classes in train 8\n","# classes in dev 8\n","# classes in test 8\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='3660' max='3660' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [3660/3660 24:46, Epoch 20/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>Accuracy</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.348600</td>\n","      <td>1.236128</td>\n","      <td>0.569600</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.086900</td>\n","      <td>1.165091</td>\n","      <td>0.603200</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>0.697600</td>\n","      <td>1.229873</td>\n","      <td>0.624000</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.718300</td>\n","      <td>1.324241</td>\n","      <td>0.606400</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.360800</td>\n","      <td>1.455510</td>\n","      <td>0.604800</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.247000</td>\n","      <td>1.639510</td>\n","      <td>0.628800</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.093100</td>\n","      <td>1.839954</td>\n","      <td>0.616000</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.081300</td>\n","      <td>2.256215</td>\n","      <td>0.590400</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.124200</td>\n","      <td>2.467415</td>\n","      <td>0.604800</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.088800</td>\n","      <td>2.653999</td>\n","      <td>0.593600</td>\n","    </tr>\n","    <tr>\n","      <td>11</td>\n","      <td>0.033100</td>\n","      <td>2.662699</td>\n","      <td>0.608000</td>\n","    </tr>\n","    <tr>\n","      <td>12</td>\n","      <td>0.006500</td>\n","      <td>2.694501</td>\n","      <td>0.604800</td>\n","    </tr>\n","    <tr>\n","      <td>13</td>\n","      <td>0.001300</td>\n","      <td>2.811574</td>\n","      <td>0.616000</td>\n","    </tr>\n","    <tr>\n","      <td>14</td>\n","      <td>0.001600</td>\n","      <td>2.909949</td>\n","      <td>0.612800</td>\n","    </tr>\n","    <tr>\n","      <td>15</td>\n","      <td>0.001000</td>\n","      <td>2.983147</td>\n","      <td>0.598400</td>\n","    </tr>\n","    <tr>\n","      <td>16</td>\n","      <td>0.000900</td>\n","      <td>3.031042</td>\n","      <td>0.609600</td>\n","    </tr>\n","    <tr>\n","      <td>17</td>\n","      <td>0.000700</td>\n","      <td>3.116341</td>\n","      <td>0.596800</td>\n","    </tr>\n","    <tr>\n","      <td>18</td>\n","      <td>0.000700</td>\n","      <td>3.113076</td>\n","      <td>0.601600</td>\n","    </tr>\n","    <tr>\n","      <td>19</td>\n","      <td>0.013800</td>\n","      <td>3.121142</td>\n","      <td>0.606400</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>0.000500</td>\n","      <td>3.094860</td>\n","      <td>0.608000</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[]},"metadata":{}}],"source":["import random\n","import time\n","\n","from datasets import load_metric\n","import numpy as np\n","import pandas as pd\n","import pickle\n","\n","\n","def compute_metrics(eval_preds):\n","    metric = load_metric(\"accuracy\")\n","    logits, labels = eval_preds\n","    predictions = np.argmax(logits, axis=-1)\n","    return metric.compute(predictions=predictions, references=labels)\n","\n","start = time.time()\n","directory = \"./data_and_models/\"\n","all_df = pd.read_csv(directory+\"target_corpus.csv\")\n","\n","import torch\n","class PSCDataset(torch.utils.data.Dataset):\n","    def __init__(self, encodings, labels):\n","        self.encodings = encodings\n","        self.labels = labels\n","\n","    def __getitem__(self, idx):\n","        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n","        item['labels'] = torch.tensor(self.labels[idx])\n","        return item\n","\n","    def __len__(self):\n","        return len(self.labels)\n","\n","import sklearn\n","def top_k_accuracy(top_k, predictions, labels):\n","  assert len(predictions) == len(labels)\n","  total = 0\n","  correct = 0\n","  for i in range(len(predictions)):\n","    total += 1\n","    prediction = []\n","    for j, k in enumerate(predictions[i]):\n","      prediction.append([j, k]) # k is the value\n","    prediction.sort(key = lambda x: -x[1])\n","    for j, _ in prediction[:top_k]:\n","      if j == labels[i]:\n","        correct += 1\n","        break\n","  return correct/total\n","\n","import transformers\n","from transformers import RobertaTokenizerFast\n","from transformers import RobertaForSequenceClassification, TrainingArguments, Trainer\n","tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')\n","\n","\n","metrics = [\"Top-1 accuracy/F1 micro\", \"Top-3 accuracy\", \"Top-5 accuracy\", \"Balanced accuracy\", \"F1 macro\"]\n","seeds = [11, 12, 13]\n","epochs = 20\n","\n","tasks = {\n","    # \"44\": {\n","    #     \"number_of_labels\": 42,\n","    #      \"label_column\": 1,\n","    # },\n","    \"8\": {\n","        \"number_of_labels\": 8,\n","        \"label_column\": 2,\n","    }\n","}\n","\n","def compute(task):\n","  t1, t3, t5, ba, f1 = [],[],[],[],[]\n","  baseline_t1, baseline_t3, baseline_t5, baseline_ba, baseline_f1 = [], [], [], [], []\n","\n","  ###### load the cross-domain classifier\n","  with open(directory + \"logistic_model_\" + task + \".pkl\", \"rb\") as doc:\n","          model = pickle.load(doc)\n","  with open(directory + \"tfidf_\" + task + \".pkl\", \"rb\") as doc:\n","          cf_tokenizer = pickle.load(doc)\n","  class_mapper = {}\n","  class_reverse_mapper = {}\n","  for i, topic in enumerate(model.classes_):\n","      class_mapper[topic.replace(\" \", \".\").replace(\"-\", \".\")] = i\n","      class_reverse_mapper[i] = topic\n","\n","  for seed in seeds:\n","    np.random.seed(seed)\n","    torch.manual_seed(seed)\n","    random.seed(seed)\n","\n","    import csv\n","    from sklearn.model_selection import train_test_split\n","\n","    index = -1\n","    classes = {}\n","    texts = []\n","    labels = []\n","    lm_reverse_mapper = {}\n","    with open(directory + \"target_corpus.csv\") as doc:\n","      reader = csv.reader(doc)\n","      next(reader)\n","      for row in reader:\n","        topic = row[tasks[task][\"label_column\"]]\n","        if topic not in classes:\n","          index += 1\n","          classes[topic] = index\n","          lm_reverse_mapper[index] = topic.capitalize()\n","        labels.append(classes[topic])\n","        texts.append(row[0])\n","    print(\"# classes\", len(classes))\n","    X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=625, random_state=seed)\n","    X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=625, random_state=seed)\n","    print(len(X_train), len(X_dev), len(X_test))\n","    print(\"# classes in train\", len(set(y_train)))\n","    print(\"# classes in dev\", len(set(y_dev)))\n","    print(\"# classes in test\", len(set(y_test)))\n","\n","    mlength = 512\n","    train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=mlength)\n","    dev_encodings = tokenizer(X_dev, truncation=True, padding=True, max_length = mlength)\n","    test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length= mlength)\n","\n","\n","    train_dataset = PSCDataset(train_encodings, y_train)\n","    dev_dataset = PSCDataset(dev_encodings, y_dev)\n","    test_dataset = PSCDataset(test_encodings, y_test)\n","\n","    training_args = TrainingArguments(\n","        output_dir=\"./results\",          # output directory\n","        num_train_epochs=epochs,         # total number of training epochs\n","        per_device_train_batch_size=16,  # batch size per device during training\n","        per_device_eval_batch_size=64,   # batch size for evaluation\n","        warmup_steps=0,                  # number of warmup steps for learning rate scheduler\n","        weight_decay=0.01,               # strength of weight decay\n","        logging_dir='./logs',            # directory for storing logs\n","        logging_steps=10,\n","        learning_rate = 2e-5,\n","        save_strategy= \"epoch\",\n","        evaluation_strategy=\"epoch\",\n","        load_best_model_at_end= True,\n","        seed = seed,\n","    )\n","\n","    def model_init():\n","        return RobertaForSequenceClassification.from_pretrained(\"roberta-base\", num_labels=tasks[task][\"number_of_labels\"])\n","    trainer = Trainer(\n","        model_init=model_init,               # the instantiated 🤗 Transformers model to be trained\n","        args=training_args,                  # training arguments, defined above\n","        train_dataset=train_dataset,         # training dataset\n","        eval_dataset=dev_dataset,            # evaluation dataset\n","        compute_metrics=compute_metrics,     # compute_metrics\n","        )\n","\n","    trainer.train()\n","    predictions = trainer.predict(test_dataset)\n","    preds = np.argmax(predictions.predictions, axis=-1)\n","\n","    t1.append(top_k_accuracy(1, predictions.predictions, test_dataset.labels))\n","    t3.append(top_k_accuracy(3, predictions.predictions, test_dataset.labels))\n","    t5.append(top_k_accuracy(5, predictions.predictions, test_dataset.labels))\n","    ba.append(sklearn.metrics.balanced_accuracy_score(test_dataset.labels, preds))\n","    f1.append(sklearn.metrics.f1_score(test_dataset.labels, preds, average = \"macro\"))\n","\n","    df = all_df[all_df[\"text\"].isin(X_test)]\n","    X = df['text']\n","    Y = list(df[\"topic_\"+ task].transform(lambda x: class_mapper[x]))\n","\n","    Xtfidf = cf_tokenizer.transform(X)\n","\n","    preds = model.predict(Xtfidf)\n","    preds = [class_mapper[topic.replace(\" \", \".\").replace(\"-\", \".\")] for topic in preds]\n","    policy_probs = model.predict_proba(Xtfidf)\n","\n","    baseline_t1.append(top_k_accuracy(1, policy_probs, Y))\n","    baseline_t3.append(top_k_accuracy(3, policy_probs, Y))\n","    baseline_t5.append(top_k_accuracy(5, policy_probs, Y))\n","    baseline_ba.append(sklearn.metrics.balanced_accuracy_score(Y, preds))\n","    baseline_f1.append(sklearn.metrics.f1_score(Y, preds, average = \"macro\"))\n","\n","  result = {}\n","  for metric, baseline, experiment in zip(metrics, [baseline_t1, baseline_t3, baseline_t5, baseline_ba, baseline_f1], [t1, t3, t5, ba, f1]):\n","    result[metric] = [np.mean(baseline), np.std(baseline), np.mean(experiment), np.std(baseline)]\n","  return result\n","\n","results = {}\n","for task in tasks:\n","  result = compute(task)\n","  results[task] = result\n","\n","np.save( directory + \"table_1_results.npy\", results)\n"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"lLXYkZy1X7dh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1709410409056,"user_tz":480,"elapsed":10,"user":{"displayName":"Yu Wang","userId":"02802289530208391889"}},"outputId":"6e07f86b-2cef-49b1-bf34-fef8e4bcf24c"},"outputs":[{"output_type":"stream","name":"stdout","text":["Top-1 accuracy/F1 micro & 0.512 (0.007) &  \\textbf{0.643 (0.007)}\\\\\n","Top-3 accuracy & 0.821 (0.001) &  \\textbf{0.899 (0.001)}\\\\\n","Top-5 accuracy & 0.917 (0.007) &  \\textbf{0.968 (0.007)}\\\\\n","Balanced accuracy & 0.465 (0.004) &  \\textbf{0.592 (0.004)}\\\\\n","F1 macro & 0.456 (0.011) &  \\textbf{0.584 (0.011)}\\\\\n"]}],"source":["def preprocess_result(result):\n","  output = []\n","  outperform = result[2] > result[0]\n","  for i, j in enumerate(result):\n","    j = str(round(j, 3))\n","    if len(j) < 5:\n","      j += \"0\" * (5-len(j))\n","    if i % 2 == 1: # standard deviation\n","      j = \"(\" + j + \")\"\n","      if i == 1:\n","        j += \" & \"\n","    if outperform:\n","      if i == 2:\n","        j = \"\\\\textbf{\" + j\n","      if i == 3:\n","        j += \"}\"\n","    else:\n","      if i == 0:\n","        j = \"\\\\textbf{\" + j\n","      if i == 1:\n","        j += \"}\"\n","    output.append(j)\n","  return \" \".join(output)\n","\n","for metric in metrics:\n","  output = [metric]\n","  for task in tasks:\n","    output.append(preprocess_result(results[task][metric]))\n","  print(\" & \".join(output) + \"\\\\\\\\\")"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"Yrtzj-vwK9h1","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1709410409057,"user_tz":480,"elapsed":7,"user":{"displayName":"Yu Wang","userId":"02802289530208391889"}},"outputId":"cf400111-b30b-4231-e4bf-18715103ac7f"},"outputs":[{"output_type":"stream","name":"stdout","text":["The program took 74.0 minutes in total.\n"]}],"source":["end = time.time()\n","print(f\"The program took {(end - start) // 60} minutes in total.\")"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"DPOufc4bLO1o","executionInfo":{"status":"ok","timestamp":1709410409732,"user_tz":480,"elapsed":678,"user":{"displayName":"Yu Wang","userId":"02802289530208391889"}}},"outputs":[],"source":["from google.colab import runtime\n","runtime.unassign()"]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[],"gpuClass":"premium"},"gpuClass":"premium","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"1399ded814714874bf7622550ac07423":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b96c7115fa504abdb76bdbfffd31e0af","IPY_MODEL_35c5287188c549f6b9ce0f777149f2dc","IPY_MODEL_267e881d0a1a4a6fbd8c21deb519a260"],"layout":"IPY_MODEL_5c84b97e84034bec980580f33a9a6bfc"}},"b96c7115fa504abdb76bdbfffd31e0af":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_72072c7e97114dde8271c781b39155a1","placeholder":"​","style":"IPY_MODEL_6a3692f7dff64b2a82c265065b15c446","value":"tokenizer_config.json: 100%"}},"35c5287188c549f6b9ce0f777149f2dc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e78fabe0c6a74780911b3c2b6f33df2c","max":25,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5dc13902568249ff85e516f2a17333c1","value":25}},"267e881d0a1a4a6fbd8c21deb519a260":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a4be2de6110b4656914c7768d07cae1d","placeholder":"​","style":"IPY_MODEL_a2f0a5652ca64036a259030b0967fbbc","value":" 25.0/25.0 [00:00&lt;00:00, 2.21kB/s]"}},"5c84b97e84034bec980580f33a9a6bfc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"72072c7e97114dde8271c781b39155a1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6a3692f7dff64b2a82c265065b15c446":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e78fabe0c6a74780911b3c2b6f33df2c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5dc13902568249ff85e516f2a17333c1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a4be2de6110b4656914c7768d07cae1d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a2f0a5652ca64036a259030b0967fbbc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c7c5c74e696e464e98138e19c31eab75":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_619f3702180f4869a2170de74d2fdad0","IPY_MODEL_402fc3ce1f674c1eb7ea97c715802e7f","IPY_MODEL_d274b4d469774cad91decd327312ac94"],"layout":"IPY_MODEL_352bab9891af429c866847d7693ef76d"}},"619f3702180f4869a2170de74d2fdad0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_997c1ce0e8f44d6c968e9dd6e3ae6ef0","placeholder":"​","style":"IPY_MODEL_e0abcb7cfc6c4e028ac065cca4e6f121","value":"vocab.json: 100%"}},"402fc3ce1f674c1eb7ea97c715802e7f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_aec2bb8e4738478a9e8e5d29f6ca56d2","max":898823,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6de45df067ed41878d2a670b88140511","value":898823}},"d274b4d469774cad91decd327312ac94":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_33450be48ed54dc8818dadff77ed30ff","placeholder":"​","style":"IPY_MODEL_07a5d73897da452fb3a09ebd52d93e68","value":" 899k/899k [00:00&lt;00:00, 3.57MB/s]"}},"352bab9891af429c866847d7693ef76d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"997c1ce0e8f44d6c968e9dd6e3ae6ef0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e0abcb7cfc6c4e028ac065cca4e6f121":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"aec2bb8e4738478a9e8e5d29f6ca56d2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6de45df067ed41878d2a670b88140511":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"33450be48ed54dc8818dadff77ed30ff":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"07a5d73897da452fb3a09ebd52d93e68":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"971cd36984984e878aaade86e425cf04":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ba97db9e81ac4540b12d385835317bd2","IPY_MODEL_0193884297004ef2ae6bb4c67cf31d9f","IPY_MODEL_170f192382f14f299e9b5e91e726892b"],"layout":"IPY_MODEL_37ea044cf91d4303a3cc68ba87972311"}},"ba97db9e81ac4540b12d385835317bd2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ebd9d09cd0584faeb445e37293d24c8e","placeholder":"​","style":"IPY_MODEL_451e43094a8c4845adc656e1f898b9dd","value":"merges.txt: 100%"}},"0193884297004ef2ae6bb4c67cf31d9f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0b51cfc000e943e4b244d086f6f2033d","max":456318,"min":0,"orientation":"horizontal","style":"IPY_MODEL_8393ec99d3d148ddb9c15caf69828c4f","value":456318}},"170f192382f14f299e9b5e91e726892b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_98d4960a9b1a4c75970b53bbe0955e06","placeholder":"​","style":"IPY_MODEL_b69bd3def77c4f308d5cdd01972747f4","value":" 456k/456k [00:00&lt;00:00, 7.57MB/s]"}},"37ea044cf91d4303a3cc68ba87972311":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebd9d09cd0584faeb445e37293d24c8e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"451e43094a8c4845adc656e1f898b9dd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0b51cfc000e943e4b244d086f6f2033d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8393ec99d3d148ddb9c15caf69828c4f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"98d4960a9b1a4c75970b53bbe0955e06":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b69bd3def77c4f308d5cdd01972747f4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9eafe09e0b1e4e9a947c7800ea5e468d":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2ddcd05359464b1bb66b6330276f88fb","IPY_MODEL_ceee2d466ee34cbabf29b4d1f27b6367","IPY_MODEL_518ae1d093f54f238d21ba6154c55045"],"layout":"IPY_MODEL_1200ef5d626d4adb9a60cac26c4a91a4"}},"2ddcd05359464b1bb66b6330276f88fb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3712dc0e37904c5eaad5215a4b8c57ec","placeholder":"​","style":"IPY_MODEL_24adcdeebbcc45c2a3e766851d8985a5","value":"tokenizer.json: 100%"}},"ceee2d466ee34cbabf29b4d1f27b6367":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7f057f02e854eb5ab8195b432515105","max":1355863,"min":0,"orientation":"horizontal","style":"IPY_MODEL_820d3affb6fb426fad1aa75190ab1f92","value":1355863}},"518ae1d093f54f238d21ba6154c55045":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e4ea68ec942b4d1b893f3cd0a792285d","placeholder":"​","style":"IPY_MODEL_ed870e26cd614d85abb4491c153dc2d6","value":" 1.36M/1.36M [00:00&lt;00:00, 5.35MB/s]"}},"1200ef5d626d4adb9a60cac26c4a91a4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3712dc0e37904c5eaad5215a4b8c57ec":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"24adcdeebbcc45c2a3e766851d8985a5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c7f057f02e854eb5ab8195b432515105":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"820d3affb6fb426fad1aa75190ab1f92":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e4ea68ec942b4d1b893f3cd0a792285d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ed870e26cd614d85abb4491c153dc2d6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8fd6b9203631400d8d247b086041a865":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2e77c4af19c64a89a691208338ce81e1","IPY_MODEL_f6a945066f094c409bdb9fea3a43d33e","IPY_MODEL_d6bd72852b954c758a4c0faef0ab9f50"],"layout":"IPY_MODEL_520867658b7c47b3925fcf74c98f7830"}},"2e77c4af19c64a89a691208338ce81e1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_44dc7af7b5904ce79ce9711fea820298","placeholder":"​","style":"IPY_MODEL_1cfb07d9ca074a4facf9973c93dbba43","value":"config.json: 100%"}},"f6a945066f094c409bdb9fea3a43d33e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_3ba96d9f2ba3448caa2504146e51fddc","max":481,"min":0,"orientation":"horizontal","style":"IPY_MODEL_ab4bc3e607fd4899b2deb6e16697faf1","value":481}},"d6bd72852b954c758a4c0faef0ab9f50":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7cefbaf4b32140989d5c9331b61cfbe3","placeholder":"​","style":"IPY_MODEL_6f475b17907e4cd998968c74106f909a","value":" 481/481 [00:00&lt;00:00, 39.0kB/s]"}},"520867658b7c47b3925fcf74c98f7830":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44dc7af7b5904ce79ce9711fea820298":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1cfb07d9ca074a4facf9973c93dbba43":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3ba96d9f2ba3448caa2504146e51fddc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ab4bc3e607fd4899b2deb6e16697faf1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7cefbaf4b32140989d5c9331b61cfbe3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f475b17907e4cd998968c74106f909a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3087ed5723b1459ba2761c95bfecdad6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ba71873c841747d496a73e562cf9aeb6","IPY_MODEL_9580f30b3f0d4addaacdce9d125d3fd0","IPY_MODEL_24e6f19792454c54a5d5e30307eaa047"],"layout":"IPY_MODEL_41d7576cd212489d92897219fa1c1d00"}},"ba71873c841747d496a73e562cf9aeb6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fd6e6b64eefc4a06aec8f06cbbfdf6e1","placeholder":"​","style":"IPY_MODEL_c3652aae2e40413a88c97471e5e6ad81","value":"model.safetensors: 100%"}},"9580f30b3f0d4addaacdce9d125d3fd0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_713c2911fe5b423d882520e5fdb37697","max":498818054,"min":0,"orientation":"horizontal","style":"IPY_MODEL_894a2e47af1242ba8aae550ff504b912","value":498818054}},"24e6f19792454c54a5d5e30307eaa047":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9eeaa650a28b4417a334cd5322690d90","placeholder":"​","style":"IPY_MODEL_5808badb3c65493c9086a9f8d6e1bea6","value":" 499M/499M [00:01&lt;00:00, 446MB/s]"}},"41d7576cd212489d92897219fa1c1d00":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fd6e6b64eefc4a06aec8f06cbbfdf6e1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c3652aae2e40413a88c97471e5e6ad81":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"713c2911fe5b423d882520e5fdb37697":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"894a2e47af1242ba8aae550ff504b912":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"9eeaa650a28b4417a334cd5322690d90":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5808badb3c65493c9086a9f8d6e1bea6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"98b842e25a7d4f80ab52e8681d1c59b5":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8ab9f5047d3c4275989d5dcde5c78e41","IPY_MODEL_c7468e53c6024fe3b95d7acca8ae9af8","IPY_MODEL_44e1569086d5461ab4b05dfc0f85167e"],"layout":"IPY_MODEL_625d1593c44e456b9ec24540c120445d"}},"8ab9f5047d3c4275989d5dcde5c78e41":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_496cec653a024b35805aa1cf2abcb5ab","placeholder":"​","style":"IPY_MODEL_87603985724e4a17946c17cf1ff32ca3","value":"Downloading builder script: "}},"c7468e53c6024fe3b95d7acca8ae9af8":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a8a7c7eb2f5944328dc764de3a6ddcab","max":1652,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b38d5afa27bd42da899b097a79d58cbb","value":1652}},"44e1569086d5461ab4b05dfc0f85167e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_318870872ce24d3b8e09bab1c1346107","placeholder":"​","style":"IPY_MODEL_cec6b2f3b57d4f07a15d6806608ff010","value":" 4.21k/? [00:00&lt;00:00, 334kB/s]"}},"625d1593c44e456b9ec24540c120445d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"496cec653a024b35805aa1cf2abcb5ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"87603985724e4a17946c17cf1ff32ca3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a8a7c7eb2f5944328dc764de3a6ddcab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b38d5afa27bd42da899b097a79d58cbb":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"318870872ce24d3b8e09bab1c1346107":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cec6b2f3b57d4f07a15d6806608ff010":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}